library(clusterProfiler)
library(org.Mm.eg.db)
library(ggplot2)
library(gprofiler2)
Input for run_select_pattern.sh: RSEM result (GeneMat_HFD_Lingon_LDF.Ebseqresults/GeneMat_HFD_Lingon_LDF.Ebseqresults_FDR_0.05.tab)
Output: Gene list with EnsemblID and GeneSymbol, classified as diffent patterns (Output folder: Matrix/pattern)
#seperate genes based on different patterns
scr/run_select_pattern.sh
#full list of expressed genes
cut -f 1 Matrix/GeneMat_HFD_Lingon_LDF.Ebseqresults | \
sed 's/_/\t/;s/"//g' | \
tail -n +2 \
> Matrix/full_gene_list2.csv
pattern2.FDR0.05 <- read.table("Matrix/pattern/pattern2_FDR0.05.csv",
header = TRUE)
pattern3.FDR0.05 <- read.table("Matrix/pattern/pattern3_FDR0.05.csv",
header = TRUE)
pattern4.FDR0.05 <- read.table("Matrix/pattern/pattern4_FDR0.05.csv",
header = TRUE)
pattern_table <- read.table(gzfile("Matrix/GeneMat_HFD_Lingon_LDF.Ebseqresults.pattern.gz"),
header = TRUE)
full_gene <- read.table("Matrix/full_gene_list.csv", header = FALSE)
names(full_gene) <- c("EnsemblID","GeneSymbol")
Summarize the number of genes classified to each pattern (FDR cutoff = 0.05)
names(pattern_table) <- c("HDF","Lingon","LFD")
RSEM_summary <-data.frame(c(0,155,40,123,0,318),
row.names = c("pattern1.FDR0.05","pattern2.FDR0.05",
"pattern3.FDR0.05","pattern4.FDR0.05",
"pattern5.FDR0.05","Sum"))
names(RSEM_summary) <- c("number of genes")
knitr::kable(pattern_table)
| HDF | Lingon | LFD | |
|---|---|---|---|
| Pattern1 | 1 | 1 | 1 |
| Pattern2 | 1 | 1 | 2 |
| Pattern3 | 1 | 2 | 1 |
| Pattern4 | 1 | 2 | 2 |
| Pattern5 | 1 | 2 | 3 |
knitr::kable(RSEM_summary)
| number of genes | |
|---|---|
| pattern1.FDR0.05 | 0 |
| pattern2.FDR0.05 | 155 |
| pattern3.FDR0.05 | 40 |
| pattern4.FDR0.05 | 123 |
| pattern5.FDR0.05 | 0 |
| Sum | 318 |
Explanation:
ego <- function(pattern_data){
ego_pattern <- enrichGO(gene = pattern_data[,1], # test gene list
universe = full_gene[,1], # background gene list
OrgDb = org.Mm.eg.db,
keyType = "ENSEMBL",
ont = 'ALL',
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
file_name <- paste("results/clusterprofiler/",
deparse(substitute(pattern_data)),
"_GO_enrich.csv", sep = "")
write.table(as.data.frame(ego_pattern), file_name, row.names = F, sep = "\t")
return(ego_pattern)
}
full_entrez <- bitr(full_gene[,1],
fromType = "ENSEMBL",
toType = "ENTREZID",
OrgDb = org.Mm.eg.db)
## 'select()' returned 1:many mapping between keys and columns
eKEGG <- function(pattern_data){
pattern_entrez <- bitr(pattern_data[,1],
fromType = "ENSEMBL",
toType = c("ENTREZID"),
OrgDb = org.Mm.eg.db)
eKEGG_pattern <- enrichKEGG(gene = pattern_entrez[,2], # test gene list
universe = full_entrez[,2], # background gene list
organism='mmu',
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
eKEGG_pattern_genes <- setReadable(eKEGG_pattern,
OrgDb = org.Mm.eg.db,
keyType="ENTREZID")
file_name <- paste("results/clusterprofiler/",
deparse(substitute(pattern_data)),
"_KEGG_enrich.csv", sep = "")
write.table(as.data.frame(eKEGG_pattern_genes),
file_name, row.names = F, sep = "\t")
return(eKEGG_pattern_genes)
}
ego_pattern2.FDR0.05 <- ego(pattern2.FDR0.05)
ego_pattern2.FDR0.05_n <- dim(ego_pattern2.FDR0.05)[1]
ego_pattern2.FDR0.05_n
## [1] 30
Gene counts and significance of enrichment:
barplot(ego_pattern2.FDR0.05, showCategory=ego_pattern2.FDR0.05_n)
Gene ratio, counts and significance of enrichment:
dotplot(ego_pattern2.FDR0.05, showCategory=ego_pattern2.FDR0.05_n)
Differentially expressed genes that belong enriched terms:
heatplot(ego_pattern2.FDR0.05)
ego_pattern3.FDR0.05 <- ego(pattern3.FDR0.05)
ego_pattern3.FDR0.05_n <- dim(ego_pattern3.FDR0.05)[1]
ego_pattern3.FDR0.05_n
## [1] 10
Gene counts and significance of enrichment:
barplot(ego_pattern3.FDR0.05,showCategory=ego_pattern3.FDR0.05_n,drop=T)
Gene ratio, counts and significance of enrichment:
dotplot(ego_pattern3.FDR0.05,showCategory=ego_pattern3.FDR0.05_n)
Differentially expressed genes that belong enriched terms:
heatplot(ego_pattern3.FDR0.05)
ego_pattern4.FDR0.05 <- ego(pattern4.FDR0.05)
ego_pattern4.FDR0.05_n <- dim(ego_pattern4.FDR0.05)[1]
ego_pattern4.FDR0.05_n
## [1] 27
Gene counts and significance of enrichment:
barplot(ego_pattern4.FDR0.05,showCategory=ego_pattern4.FDR0.05_n,drop=T)
Gene ratio, counts and significance of enrichment:
dotplot(ego_pattern4.FDR0.05,showCategory=ego_pattern4.FDR0.05_n)
Differentially expressed genes that belong enriched terms:
heatplot(ego_pattern4.FDR0.05)
eKEGG_pattern2.FDR0.05 <- eKEGG(pattern2.FDR0.05)
knitr::kable(as.data.frame(eKEGG_pattern2.FDR0.05))
| ID | Description | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | geneID | Count | |
|---|---|---|---|---|---|---|---|---|---|
| mmu04610 | mmu04610 | Complement and coagulation cascades | 10/68 | 88/6987 | 0 | 1.6e-06 | 1.6e-06 | Serpine1/Fga/Kng1/Fgg/Fgb/Serpina1e/Serpina1c/Plg/F2/F2r | 10 |
eKEGG_pattern3.FDR0.05 <- eKEGG(pattern3.FDR0.05)
knitr::kable(as.data.frame(eKEGG_pattern3.FDR0.05))
| ID | Description | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | geneID | Count |
|---|
eKEGG_pattern4.FDR0.05 <- eKEGG(pattern4.FDR0.05)
knitr::kable(as.data.frame(eKEGG_pattern4.FDR0.05))
| ID | Description | GeneRatio | BgRatio | pvalue | p.adjust | qvalue | geneID | Count | |
|---|---|---|---|---|---|---|---|---|---|
| mmu04015 | mmu04015 | Rap1 signaling pathway | 9/51 | 201/6987 | 1.20e-05 | 0.0022877 | 0.0016604 | Prkd2/Mapk3/Calml3/Arap3/Fgfr4/Pik3r3/Adcy8/Cdh1/Gnai2 | 9 |
| mmu04024 | mmu04024 | cAMP signaling pathway | 8/51 | 201/6987 | 8.97e-05 | 0.0085247 | 0.0061869 | Mapk3/Calml3/Arap3/Pde4b/Pik3r3/Ppp1r1b/Adcy8/Gnai2 | 8 |
namesP <- c("term_id", "source", "p_value", 'term_size', 'intersection_size',
"term_name","intersection")
ggo <- function(pattern_data){
gost_pattern <- gost(pattern_data[,2],
domain_scope = "custom",
custom_bg = full_gene[,2],
sources = c('GO', 'KEGG', 'REAC', 'WP'),
organism ='mmusculus',
significant = TRUE,
evcodes=TRUE)
file_name <- paste("results/gprofiler/",
deparse(substitute(pattern_data)),
"_gProfiler_enrich.csv", sep = "")
write.table(as.data.frame(gost_pattern$result[,namesP]),
file_name, row.names = F, sep = "\t")
return(gost_pattern)
}
gost_pattern2 <- ggo(pattern2.FDR0.05)
gostplot(gost_pattern2)
gost_pattern3 <- ggo(pattern3.FDR0.05)
gostplot(gost_pattern3)
gost_pattern4 <- ggo(pattern4.FDR0.05)
gostplot(gost_pattern4)
results/clusterprofiler/ - clusterProfiler GO and KEGG enrichment results.results/gprofiler/ - gProfiler GO, KEGG, Reactome, and WikiPathways enrichment results.